## Cross-validated variables
if (do.origspec) {
    thresholds <- c(10, 30)
    monthsuffs <- paste0('0', 5:8, '.1')
    mainspec.rhs <- "tmin + gdd1000 + edd1000 + prcp + prcp2 + state:ns(year, df=3) | Munic�pio + year"
    weatherpreds <- c('tmin', 'gdd1000', 'edd1000', 'prcp', 'prcp2')
    weathersymbols <- c('\\mu', '\\gamma', '\\kappa', '\\pi', '\\psi')
    weatherlabels <- c("Average Min.", "GDDs / 1000", "EDDs / 1000", "Precip. (m)", "Precip.^2")
    perioddays <- 31 + 30 + 31 + 31
    output.suffix <- "-orig"
} else {
    thresholds <- c(10, 32)
    monthsuffs <- paste0('0', 4:8, '.1')
    mainspec.rhs <- "tmin + tmax + gdd1000 + edd1000 + prcp + prcp2 + state:ns(year, df=3) | Munic�pio + year"
    weatherpreds <- c('tmin', 'tmax', 'gdd1000', 'edd1000', 'prcp', 'prcp2')
    weathersymbols <- c('\\mu_1', '\\mu_2', '\\gamma', '\\kappa', '\\pi', '\\psi')
    weatherlabels <- c("Average Min.", "Average Max.", "GDDs / 1000", "EDDs / 1000", "Precip. (m)", "Precip.^2")
    perioddays <- 30 + 31 + 30 + 31 + 31
    output.suffix <- "-minx"
}

if (!exists('do.configonly') || !do.configonly) {
    ## Load the data
    if (!exists('do.allyears'))
        do.allyears <- F
    if (do.allyears) {
        df <- read.csv("../data/observations-full-all.csv", fileEncoding = "iso-8859-1")
    } else {
        df <- read.csv("../data/observations-full.csv", encoding = "UTF-8")
    }
    df$logyield <- log(df$total.produce / df$total.harvest)
    df$logyield[!is.finite(df$logyield)] <- NA

    df$state <- gsub("^(.+)\\((..)\\)", "\\2", df$Munic�pio)

    ## Combine across months
    if (!exists('do.plus2c') || !do.plus2c) {
        for (col in c('below0', paste0('above', thresholds), 'tmin', 'tmax', 'tavg', 'prcp')) {
            df[, col] <- 0
            for (monthsuff in monthsuffs)
                df[, col] <- df[, col] + df[, paste0(col, monthsuff)]
        }
        df$tmin <- df$tmin / length(monthsuffs)
        df$tmax <- df$tmax / length(monthsuffs)
        df$tavg <- df$tavg / length(monthsuffs)
        df$gdd1000 <- (df[, paste0('above', thresholds[1])] - df[, paste0('above', thresholds[2])]) / 1000
        df$edd1000 <- df[, paste0('above', thresholds[2])] / 1000
    } else {
        stopifnot(thresholds[1] == 10)
        for (col in c('below0', paste0('above', c(10, thresholds[2] - 2)), 'tmin', 'tmax', 'tavg', 'prcp')) {
            df[, col] <- 0
            for (monthsuff in monthsuffs)
                df[, col] <- df[, col] + df[, paste0(col, monthsuff)]
        }

        df$tmin <- df$tmin / length(monthsuffs) + 2
        df$tmax <- df$tmax / length(monthsuffs) + 2
        df$tavg <- df$tavg / length(monthsuffs) + 2
        df$gdd1000 <- (df[, paste0('above', thresholds[1])] - df[, paste0('above', thresholds[2] - 2)] + 2*perioddays) / 1000
        df$edd1000 <- df[, paste0('above', thresholds[2] - 2)] / 1000
    }

    df$prcp2 <- df$prcp^2

    library(readxl)

    all.prices <- read_xlsx("../data/CMO-Historical-Data-Annual.xlsx", sheet=4, skip=6)
    prices <- all.prices[-1, c('...1', 'Coffee, Arabica', 'Coffee, Robusta')]
    names(prices) <- c('year', 'price.arabica', 'price.robusta')
    prices$price.arabica <- as.numeric(as.character(prices$price.arabica))
    prices$price.robusta <- as.numeric(as.character(prices$price.robusta))

    library(dplyr)
    df.variety <- df %>% group_by(Munic�pio) %>% summarize(arabica.portion=mean(ifelse(is.na(arabica.produce), ifelse(is.na(robusta.produce), NA, 0), arabica.produce) / total.produce, na.rm=T))
}

get.price.delayed <- function(subdf) {
    subdf2 <- subdf %>% left_join(df.variety)

    prices$year.p1 <- prices$year + 1
    subdf3 <- subdf2 %>% left_join(prices, by=c('year'='year.p1')) # 2000 gets 1999

    subdf3$price.arabica * subdf3$arabica.portion + subdf3$price.robusta * (1 - subdf3$arabica.portion)
}

get.portion.arabica <- function(df) {
    df$portion.arabica <- NA
    for (region in unique(df$Munic�pio)) {
        subdf <- df[df$Munic�pio == region,]
        portion.arabica <- get.portion.arabica.one(subdf)

        df$portion.arabica[df$Munic�pio == region] <- portion.arabica
    }

    df$portion.arabica
}

get.portion.arabica.one <- function(subdf) {
    portion.arabica <- subdf$arabica.produce / subdf$total.produce
    if (any(!is.na(portion.arabica))) {
        portion.arabica[is.na(portion.arabica)] <- portion.arabica[!is.na(portion.arabica)][1]
    } else
        portion.arabica[is.na(portion.arabica)] <- 1

    portion.arabica
}

get.fit.params <- function(output.suffix) {
    allfits <- read.csv(paste0("../data/allfits", output.suffix, ".csv"), nrows=2)

    param <- names(allfits)[-1]
    fes <- grep("_fe", param)

    param[-fes]
}

get.model.base <- function(df) {
    library(lfe)
    library(splines)
    felm(as.formula(paste("logyield ~", mainspec.rhs, "| 0 | Munic�pio + year")), data=df)
}
